% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%            SIMULATION OF SCHOOL CHOICE DATA UNDER  DA                                           %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %
home
% ----------------------------------------------------------------------------------------------- %
%%                                      OPTIONS                                                   %
% ----------------------------------------------------------------------------------------------- %
% Set global timer
step_total_time=tic;

% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%%                                  PART I : MODEL SETUP                                          %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %
% Set timer Part I
step_part1 = tic;

% ----------------------------------------------------------------------------------------------- %
%                                 1.0/ MISTAKES                                                   %
% ----------------------------------------------------------------------------------------------- %

% Fraction of mistakes: payoff-relevant and payoff-irrelevant.
% mistake_pp = [-10,-2,-1,0,0.075,0.15,0.225,0.3]'; % refer to PA_dis; old version with 8 DGPs;

% mistake_pp = [-10,-1.5,-0.5,0.3]'; % refer to PA_dis; new version with 4 DGPs: TT, IRR1-2, REL
mistake_pp = [-10,-1.5,0.3]'; % refer to PA_dis; new version with 3 DGPs: TT, IRR, REL

P = length(mistake_pp);
% P=2;
% ----------------------------------------------------------------------------------------------- %
%                               1.1/ MODEL PARAMETERS                                             %
% ----------------------------------------------------------------------------------------------- %

% Model with I students and J schools
% M Monte Carlo sample replications to simulate unonstrained/constrained choice
% (Note: the same MC samples are used in both situations)

% ------------------ %
% SPECIFY PARAMETERS %
% ------------------ %

% propor: number of students/100
propor = 15;
% I: # of students
I = propor*120;
% J: # of schools
J = 12;
% Capacities: school capacities (normalized to total capacity of 100)
Capacities = propor*[10; 5; 10; 10; 5; 10; 10; 5; 10; 10; 5; 10];
% A: maximum size of ROL (A<=J)
A = J;
% rho: correlation of priorities across schools
rho = 1;
% M: # of MC samples
M = 150;
NSim = M;

M_CD = 1000; % number of simulations for cutoff distribution

% 3 types of coefficients: i) school fixed effects; ii) coefficient on distance; iii) own score x school score
% FE: school fixed effects
coeff_fe = 0.3;
FE = coeff_fe*[1:J] + 20;
% FE = [9, 9, 9.25, 9.5, 9.75, 10, 10.25, 10.5, 10.75, 11, 11.25, 11.5];
% coeff_dist: coefficient on distance
coeff_dist = -1;
% coeff_char: coefficient on binary x odd numbered schools
coeff_char = 2;
% vector of dummies for special schools (odd-number schools)
Sch_special = [1,0,1,0,1,0,1,0,1,0,1,0];
% small school status
Sch_small = [0,1,0,0,1,0,0,1,0,0,1,0];

% --------------------- %
% True parameter values %
% --------------------- %

% theta_true = (FE - FE(1))';
% theta_true = theta_true(2:end);
% Number of covariates
% if coeff_char ~= 0
%     theta_true = [theta_true; coeff_char];
% end
% if coeff_dist ~= 0
%     theta_true = [theta_true; coeff_dist];
% end

theta_true = [coeff_fe; coeff_char; coeff_dist];

% ----------------- %
% CHECK CONSISTENCY %
% ----------------- %

% Check: I/J must be an integer. If not, exit program
if mod(I,J) ~=0 && isequal(Capacities,(I/J).*ones(J,1))
    disp('Execution stopped: I/J must be an integer.')
    % To exit keyboard, type -dbquit-
    keyboard
end

% Check : A<=J
if (A>J)
    disp('Error: A > J')
    keyboard % To exit keyboard, type -dbquit-
end

% ----------------------------------------------------------------------------------------------- %
%                                  1.3/ STUDENT PRIORITIES                                        %
% ----------------------------------------------------------------------------------------------- %

% ------------------- %
%     PRIORITIES      %
% ------------------- %

% student score
MC.Priority = (1:I)'/I; % same priority at each school.
% MC.Stu_char = MC.Priority+rand(I,1)/5;
rng(1101);
% MC.Stu_char = repmat(MC.Priority,1,M)+rand(I,M)/2;

% Student binary characteristics
MC.Stu_bin = ((rand(I,M)<2/3) .* repmat((MC.Priority < 1/2),1,M)); % minorities
MC.Stu_char = MC.Stu_bin;
% -- for simulation of cutoff distribution
rng(1102);
CD.Priority = MC.Priority;
CD.Stu_char = ((rand(I,M_CD)<2/3) .* repmat((CD.Priority < 1/2),1,M_CD));

% ----------------------------------------------------------------------------------------------- %
%                                   1.4/ DISTANCE TO SCHOOL                                       %
% ----------------------------------------------------------------------------------------------- %

% 1/ Schools' geographic coordinates
% ----------------------------------

% N.B.: schools are located on a circle of radius one and are equally spaced on that circle

school_x = zeros(J,1);
school_y = zeros(J,1);

for jj=1:J
    school_x(jj) = cosd(90+((jj-1)/J)*360)./2;%cosd(90+((jj-1)/J)*360);
    school_y(jj) = sind(90+((jj-1)/J)*360)./2;%sind(90+((jj-1)/J)*360);
end

% 2/ Students' geographic coordinates
% -----------------------------------

% N.B.: students' coordinates are randomly distributed on a disc of radius 2
% Coordinates are randomly drawn for each Monte Carlo sample
% Source : http://www.mathworks.com/matlabcentral/answers/294-generate-random-points-inside-a-circle

% a) Student coordinates in the MC samples
rng(10) % to control random number generator
MC.theta = rand(1,I*M)*(2*pi); % theta: random angle
rng(11) % to control random number generator
MC.r = sqrt(rand(1,I*M))*2; % r: random radius
MC.stu_x = reshape(MC.r .* cos(MC.theta),I,M)./2; %reshape(MC.r .* cos(MC.theta),I,M); % X Coordinates
MC.stu_y = reshape(MC.r .* sin(MC.theta),I,M)./2; %reshape(MC.r .* sin(MC.theta),I,M) % Y Coordinates

% b) Student coordinates in the CUTOFF-DISTRIBUTION samples (which is used to determine cutoff distribution)
rng(1112) % to control random number generator
CD.theta = rand(1,I*M_CD)*(2*pi); % theta: random angle
rng(1113) % to control random number generator
CD.r = sqrt(rand(1,I*M_CD))*2; % r: random radius
CD.stu_x = reshape(CD.r .* cos(CD.theta),I,M_CD)./2; % X Coordinates
CD.stu_y = reshape(CD.r .* sin(CD.theta),I,M_CD)./2; % Y Coordinates

% Check
check_x = (MC.stu_x(:,1));
check_y = (MC.stu_y(:,1));

% Circle
circle_x = cos(0:0.01:2*pi);%2*cos(0:0.01:2*pi);
circle_y = sin(0:0.01:2*pi);%2*sin(0:0.01:2*pi);

% Map of simulated school district
figure('Name','Geographic Coordinates','NumberTitle','off')
axis equal;
%figuresize(80, 80, 'cm')
%axis([-2 2 -2 2]);
axis([-1 1 -1 1]);
legend_names_g=cell(2,1);
legend_names_g{1} = ['Applicants'];
legend_names_g{2} = ['Colleges'];
hold on;
plot(check_x,check_y,'.','Color','blue','MarkerSize',8);
plot(school_x,school_y,'.','Color','red','MarkerSize',25);
plot(circle_x,circle_y,'Color','black');
g_legend = legend(legend_names_g,'Location','northoutside','Orientation','horizontal');
set(g_legend,'FontSize',10);
hold off;

close all

% 3/ Distance to schools
% ----------------------

% a) In the preliminary samples
CD.distance_school = zeros(I,J,M_CD);
for ii=1:I % loop over students
    for jj=1:J % loop over schools
        for mm=1:M_CD % loop over MC samples
            CD.distance_school(ii,jj,mm) = sqrt( (CD.stu_x(ii,mm)-school_x(jj)).^2 + (CD.stu_y(ii,mm)-school_y(jj)).^2 );
        end
    end
end

% b) In the MC samples
MC.distance_school = zeros(I,J,M);
for ii=1:I % loop over students
    for jj=1:J % loop over schools
        for mm=1:M % loop over MC samples
            MC.distance_school(ii,jj,mm) = sqrt( (MC.stu_x(ii,mm)-school_x(jj)).^2 + (MC.stu_y(ii,mm)-school_y(jj)).^2 );
        end
    end
end

% ----------------------------------------------------------------------------------------------- %
%                                   1.5/ SCHOOL CHARACTERISTICS                                   %
% ----------------------------------------------------------------------------------------------- %

% School_char: IxJxM
School_char = repmat(Sch_special,I,1,M);
School_char_CD= repmat(Sch_special,I,1,M_CD);
School_small = repmat(Sch_small,I,1,M);
% School_small_CD= repmat(Sch_small,I,1,M_CD); % no need in true pref.

% ----------------------------------------------------------------------------------------------- %
%                                 1.6/ STUDENT PREFERENCES                                        %
% ----------------------------------------------------------------------------------------------- %

% Random utility model : U_ij = V_ij + E_ij
% Vij : Deterministic component
% E_ij: Random component (Type-I extreme value)

% 1/ V_ij: Deterministic component of utility
% -------------------------------------------

% MC.V_ij = repmat(FE,[I,1,M]) + coeff_dist .* MC.distance_school + coeff_char .* repmat(School_char,[I,1,M]).* repmat(repmat(MC.Stu_char,1,J),[1,1,M])  ;
MC.V_ij = repmat(FE,[I,1,M]) + coeff_dist .* MC.distance_school + coeff_char .* School_char .* repmat(reshape(MC.Stu_char,[I,1,M]),1,J,1);
% MC.V_ij = repmat(FE,[I,1,M]) + coeff_bin*repmat(odd,[I,1,M]) .* repmat(reshape(MC.Stu_bin,[I,1,M]),1,J,1) + coeff_dist .* MC.distance_school;

% 2/ E_ij: Idiosyncratic component of utility
% -------------------------------------------
% N.B.: E_ij are random type-I errors

rng(15)
MC.E_ij = -log(-log(rand(I,J,M)));

% 3/ U_ij: Utility of each school
% -------------------------------

MC.U_ij = MC.V_ij + MC.E_ij;

% Alternative structure (for compatibility with parfor)
MC_U_ij = permute(MC.U_ij, [3 2 1]);

% Check that all utilities are non-negative
MC.negative_utilities = MC.U_ij(MC.U_ij<0);

% % If a utility is found to be negative, exit
% if (isempty(MC.negative_utilities)==0)
%     disp('Error: negative utilities')
%     keyboard % To exit keyboard, type -dbquit-
% end


disp(' ')
disp('PART I: MODEL PARAMETERS')
disp('------------------------')
disp(' ')
disp(['# of students (I): ',num2str(I)])
disp(['# of schools (J): ',num2str(J)])
% disp(['# of allowed choices (A): ',num2str(A)])
disp(['# of MC samples: ',num2str(M)])
% disp(['Correlation between priorities across schools (rho): ',num2str(rho)])
% disp(['max s.d. of uncertainty on priorities (sigma): ',num2str(Sigma)])
disp(['School capacities: ',mat2str(Capacities')])
disp(['Total school capacities: ',mat2str(sum(Capacities))])
disp(['School char: ',mat2str(mean(squeeze(School_char(1,:,:)),2),2)])
disp(['School fixed effects: ',mat2str(FE)])
disp(['Coefficient on distance: ',num2str(coeff_dist)])
disp(['Coefficient on own type x school type: ',num2str(coeff_char)])
disp(['-- Part I execution time: ',num2str(toc(step_part1)),' seconds'])
clear step_part1;


% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%%                          PART II : DISTRIBUTION OF CUTOFFS                                     %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %

% Set timer Part 2
step_part2 = tic;

disp(' ')
disp('PART II: DISTRIBUTION OF CUTOFFS')
disp('-----------------------------------------')
disp(' ')

% ------------------------ %
% COMPLETE RANK ORDER LIST %
% ------------------------ %

disp('1) Create truth-telling lists...')

% N.B.: list of school ranks(LSR) is different from rank-orderd list (ROL)
% ROL: (IxJ) matrix where (i,j) = identifier of school ranked j-th in student i's preferences (from top = col1 to bottom = col J)
% LSR: (IxJ) matrix where (i,j) = rank of school j in student i's preferences (= 1 for most preferred)
% Source: % https://groups.google.com/forum/#!topic/comp.soft-sys.matlab/RMG2qGDqOeU

% Use different random shocks

% Random utility model : U_ij = V_ij + E_ij
% Vij : Deterministic component
% E_ij: Random component (Type-I extreme value)
rng(555)
CD.U_ij = repmat(FE,[I,1,M_CD]) + coeff_dist .* CD.distance_school + coeff_char .* School_char_CD .* repmat(reshape(CD.Stu_char,[I,1,M_CD]),1,J,1) + -log(-log(rand(I,J,M_CD)));

% Alternative structure (for compatibility with parfor)
CD_U_ij = permute(CD.U_ij, [3 2 1]);

[~, CD.ROL] = sort(CD.U_ij,2,'descend');

% ---------------------------------- %
% RUN DA TO DETERMINE SCHOOL CUTOFFS %
% ---------------------------------- %

disp(' ')
disp('2) Run DA to determine school cutoffs...')

CD.Cutoffs = NaN(J,M_CD);
CD.Assigned = NaN(M_CD,I);
CD.Matched =  NaN(M_CD,J,I);

% Compute students' and schools' ranked preferences

% Stu_rk_pref: Students' preferences ranked over schools
% (J,I,M) matrix where, for a given MC sample, (j,i) is the preference of student i for school j, in ranks (from 0 if unranked to J for most preferred)
% TT.Stu_rk_pref = zeros(I,J,M);
% temp = flipdim(TT.ROL,2);
% [A1, A2, A3]=ndgrid(1:I, 1:J, 1:M);
% A1(temp==0)=0;
% A2(temp==0)=0;
% A3(temp==0)=0;
% TT.Stu_rk_pref(sub2ind(size(temp), nonzeros(A1), nonzeros(temp), nonzeros(A3))) = nonzeros(A2);
CD.Stu_rk_pref = 1+abs(min(min(min(CD.U_ij))))+CD.U_ij; % equilvalent to ranking; 0 = unranked.

% Run Student-Proposing DA algorithm

CD_Assigned=NaN(M_CD,I);
CD_Matched=NaN(M_CD,J,I);
CD_Cutoffs=NaN(J,M_CD);

% Loop over CD (cutoff distribution) samples
parfor mm = 1:M_CD
    
    % Assigned: each student's assigned school
    % Matched: each school's assigned students
    [Assigned, Matched] = func_DA('stu', CD.Stu_rk_pref(:,:,mm)', repmat(CD.Priority',J,1), Capacities);
    
    % Find school cutoffs
    Cutoffs=NaN(J,1);
    for jj = 1:J % loop over schools
        Cutoffs(jj) = min( CD.Priority(Matched(jj,:) == 1) );
        % If school capacity is not filled, then cutoff is 0
        if sum(Matched(jj,:)) < Capacities(jj)
            Cutoffs(jj)=0;
        end
        % If the school's cutoff is the student with smallest priority, then cutoff is zero
        if Cutoffs(jj) == 1/I
            Cutoffs(jj)=0;
        end
    end
    CD_Assigned(mm,:) = Assigned;
    CD_Matched(mm,:,:) = Matched;
    CD_Cutoffs(:,mm) = Cutoffs;
end

CD.Assigned = CD_Assigned;
CD.Matched = CD_Matched;
CD.Cutoffs = CD_Cutoffs;

clear CD_Assigned CD_Matched CD_Cutoffs;
clear CD.Assigned CD.Matched

% ------------------------------------------------------------ %
% EMPIRICAL DISTRIBUTION OF CUTOFFS UNDER TRUTH-TELLING        %
% ------------------------------------------------------------ %

disp(' ')
disp('3) Empirical distribution of cutoffs under TT choice...')

% Non parametric distribution of cutoffs
figure('Name','Cutoffs given Truth-telling','NumberTitle','off')
xlabel('College Admission Cutoff');
axis([0 1 0 50])
xi = linspace(0,1,100);
f=zeros(J,100);
x=zeros(J,100);
legend_names=cell(1,J);
hold all;
for jj=1:J
    [f(jj,:),x(jj,:)] = ksdensity(CD.Cutoffs(jj,:),xi);
    % small schools [10; 5; 10; 10; 5; 10; 10; 5; 10; 10; 5; 10]
    if jj == 12
        plot(x(jj,:),f(jj,:),'m:','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    elseif jj == 11          
        plot(x(jj,:),f(jj,:),'m','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    elseif jj == 10          
        plot(x(jj,:),f(jj,:),'m--','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
        %
    elseif jj == 9
        plot(x(jj,:),f(jj,:),'b:','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    elseif jj == 8          
        plot(x(jj,:),f(jj,:),'b','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    elseif jj == 7          
        plot(x(jj,:),f(jj,:),'b--','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
        %
    elseif jj == 6
        plot(x(jj,:),f(jj,:),'g:','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    elseif jj == 5          
        plot(x(jj,:),f(jj,:),'g','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    elseif jj == 4          
        plot(x(jj,:),f(jj,:),'g--','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
        %
    elseif jj == 3
        plot(x(jj,:),f(jj,:),'r:','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    elseif jj == 2          
        plot(x(jj,:),f(jj,:),'r','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    elseif jj == 1          
        plot(x(jj,:),f(jj,:),'r--','LineWidth',1.25);
        legend_names{jj} = ['College ' num2str(jj)];
    end       
end
legend(legend_names);
%hold off;

clear xi x f legend_names;

cd(folder_figures)
print('fig_E1_cutoffs','-djpeg')
print('fig_E1_cutoffs','-depsc2')
close all

disp(['-- Part II execution time: ',num2str(toc(step_part2)),' seconds'])
clear step_part2;

% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%%                          PART III : TRUTH-TELLING SIMULATIONS                                   %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %

% Set timer Part 3
step_part3 = tic;

disp(' ')
disp('PART III: TRUTH-TELLING SIMULATIONS')
disp('-----------------------------------------')
disp(' ')

% ------------------------ %
% COMPLETE RANK ORDER LIST %
% ------------------------- %

disp('1) Create truth-telling lists...')

% N.B.: list of school ranks(LSR) is different from rank-orderd list (ROL)
% ROL: (IxJ) matrix where (i,j) = identifier of school ranked j-th in student i's preferences (from top = col 1 to bottom = col J)
% LSR: (IxJ) matrix where (i,j) = rank of school j in student i's preferences (= 1 for most preferred)
% Source: % https://groups.google.com/forum/#!topic/comp.soft-sys.matlab/RMG2qGDqOeU
[~, TT.ROL] = sort(MC.U_ij,2,'descend');

% ---------------------------------- %
% RUN DA TO DETERMINE SCHOOL CUTOFFS %
% ---------------------------------- %

disp(' ')
disp('2) Run DA to determine school cutoffs...')

TT.Cutoffs = NaN(J,M);
TT.Assigned = NaN(M,I);
TT.Matched =  NaN(M,J,I);

% Compute students' and schools' ranked preferences

% Stu_rk_pref: Students' preferences ranked over schools
% (J,I,M) matrix where, for a given MC sample, (j,i) is the preference of student i for school j, in ranks (from 0 if unranked to J for most preferred)
% TT.Stu_rk_pref = zeros(I,J,M);
% temp = flipdim(TT.ROL,2);
% [A1, A2, A3]=ndgrid(1:I, 1:J, 1:M);
% A1(temp==0)=0;
% A2(temp==0)=0;
% A3(temp==0)=0;
% TT.Stu_rk_pref(sub2ind(size(temp), nonzeros(A1), nonzeros(temp), nonzeros(A3))) = nonzeros(A2);
TT.Stu_rk_pref = 1+abs(min(min(min(MC.U_ij))))+MC.U_ij;

% Run Student-Proposing DA algorithm

TT_Assigned=NaN(M,I);
TT_Matched=NaN(M,J,I);
TT_Cutoffs=NaN(J,M);

% Loop over MC samples
parfor mm = 1:M
    
    % Assigned: each student's assigned school
    % Matched: each school's assigned students
    [Assigned, Matched] = func_DA('stu', TT.Stu_rk_pref(:,:,mm)', repmat(MC.Priority',J,1), Capacities);
    
    % Find school cutoffs
    Cutoffs=NaN(J,1);
    for jj = 1:J % loop over schools
        Cutoffs(jj) = min( MC.Priority(Matched(jj,:) == 1) );
        % If school capacity is not filled, then cutoff is 0
        if sum(Matched(jj,:)) < Capacities(jj)
            Cutoffs(jj)=0;
        end
        % If the school's cutoff is the student with smallest priority, then cutoff is zero
        if Cutoffs(jj) == 1/I
            Cutoffs(jj)=0;
        end
    end
    TT_Assigned(mm,:) = Assigned;
    TT_Matched(mm,:,:) = Matched;
    TT_Cutoffs(:,mm) = Cutoffs;
end

TT.Assigned = TT_Assigned;
TT.Matched = TT_Matched;
TT.Cutoffs = TT_Cutoffs;

clear TT_Assigned TT_Matched TT_Cutoffs;

disp(['-- Part III execution time: ',num2str(toc(step_part3)),' seconds'])
clear step_part3;

% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%%                             PART IV : ADMISSION PROBABILITY WHEN TRUTH-TELLING                 %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %

% Set timer Part 4
step_part4 = tic;

disp(' ')
disp('PART IV : ADMISSION PROBABILITY WHEN TRUTH-TELLING')
disp('-----------------------------------------')
disp(' ')

% Probability of being accepted by each school given the true preference
% order
% ROL: (IxJ) matrix where (i,j) = identifier of school ranked j-th in student i's preferences (from top = col 1 to bottom = col J)

TT.PA = NaN(I,J,M);
TT.PA_reorder = TT.PA;
TT.PA_ro = TT.PA;
% Loop over MC samples
for mm = 1:M
    for ii=1:I
        % prob being accepted by one's first choice
        TT.PA(ii,1,mm) = sum(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,1,mm),:))/M_CD;
        
        TT.PA(ii,2,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,2,mm),:)))/M_CD;
        
        TT.PA(ii,3,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,3,mm),:)))/M_CD;
        
        TT.PA(ii,4,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,4,mm),:)))/M_CD;
        
        TT.PA(ii,5,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,4,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,5,mm),:)))/M_CD;
        
        TT.PA(ii,6,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,4,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,5,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,6,mm),:)))/M_CD;
        
        TT.PA(ii,7,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,4,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,5,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,6,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,7,mm),:)))/M_CD;
        
        TT.PA(ii,8,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,4,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,5,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,6,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,7,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,8,mm),:)))/M_CD;
        
        TT.PA(ii,9,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,4,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,5,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,6,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,7,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,8,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,9,mm),:)))/M_CD;
        
        TT.PA(ii,10,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,4,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,5,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,6,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,7,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,8,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,9,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,10,mm),:)))/M_CD;
        
        TT.PA(ii,11,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,4,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,5,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,6,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,7,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,8,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,9,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,10,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,11,mm),:)))/M_CD;
        
        TT.PA(ii,12,mm) = sum((MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,1,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,2,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,3,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,4,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,5,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,6,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,7,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,8,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,9,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,10,mm),:)) ...
            .*(MC.Priority(ii) < CD.Cutoffs(TT.ROL(ii,11,mm),:)) ...
            .*(MC.Priority(ii) >= CD.Cutoffs(TT.ROL(ii,12,mm),:)))/M_CD;
        
        TT.PA_ro(ii,TT.ROL(ii,:,mm),mm) = TT.PA(ii,:,mm);
        
    end
end

TT.PA_reorder = TT.PA_ro;

rng(50000)
TT_ran = rand(I,J,M); % to be used in assigning the mistakes
TT.PA_reorder(((TT.PA_ro == 0) .* (TT_ran <= 1/4)) == 1) = -2;
TT.PA_reorder(((TT.PA_ro == 0) .* (TT_ran >  1/4).*(TT_ran <= 5/6) == 1)) = -2;
TT.PA_reorder(((TT.PA_ro == 0) .* (TT_ran >  5/6) == 1)) = 2;

clear TT_ran 

% for those who have all admission probabilities = 0, keep a random number
% of them
rng(50001)
TT_ind = 1 - (0-floor(12*rand(I,M,12))).*(rand(I,M,12)>0.8);

for mm = 1:M
    for ii=1:I
        if sum(TT.PA_ro(ii,:,mm)) == 0
            %TT.PA_reorder(ii,TT.ROL(ii,1:2,mm),mm) = 2;
            TT.PA_reorder(ii,TT.ROL(ii,TT_ind(ii,mm,:),mm),mm) = 2;
        elseif max(TT.PA_ro(ii,:,mm)) > 0 && max(TT.PA_ro(ii,:,mm)) <= mistake_pp(P) % max admission prob to be skipped
            %TT.PA_reorder(ii,TT.ROL(ii,1:2,mm),mm) = 2;
            TT.PA_reorder(ii,TT.ROL(ii,TT_ind(ii,mm,:),mm),mm) = 2;
        end
    end
end

clear TT_ind

PA_dis = [(84:99)', prctile(reshape(TT.PA,[],1),84:99)'];


disp(['-- Part IV execution time: ',num2str(toc(step_part4)),' seconds'])
clear step_part4;

% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%%                             PART V :  MISTAKES                                                 %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %

% Set timer Part 5
step_part5 = tic;

disp(' ')
disp('PART V: MISTAKES')
disp('----------------------------------------')
disp(' ')

% prob(making a mistake is positively correlated with Stu_char)
rng(106);
rand1 = rand(I,M);
rng(107);
rand2 = rand(I,M);
rng(108);
rand3 = rand(I,M);

% Mis.Mistake = (rand1 > 0.2);
% Mis.Mistake = repmat(Mis.Mistake,[1,1,P]);
Mis.Mistake = (rand1 + rand2.* MC.Stu_char/4 > 0.4);
Mis.Mistake2 = Mis.Mistake.*(rand3>0.15);
Mis.Mistake3 = Mis.Mistake.*(rand3>0.05);
Mis.Mistake = repmat(Mis.Mistake,[1,1,P]);
Mis.Mistake(:,:,2) = Mis.Mistake2;
Mis.Mistake(:,:,3) = Mis.Mistake3;
clear Mis.Mistake2

Mis.ROL = NaN(I,J,M,P);
Mis.Stu_rk_pref = NaN(I,J,M,P);
for pp = 1:P
    for jj = 1:J
%         Mis.Stu_rk_pref(:,jj,:,pp) = ...
%             TT.Stu_rk_pref(:,jj,:).*(1 - (jj ~= reshape(TT.Assigned',[I,1,M]))...
%             .* (reshape(Mis.Mistake(:,:,pp),[I,1,M]) == 1) ...
%             .* (TT.PA_reorder(:,jj,:) <= mistake_pp(pp)) ...
%             .* (sum((TT.PA_reorder(:,:,:) <= mistake_pp(pp)),2) <= J-1) - ...
%             (jj == reshape(TT.Assigned',[I,1,M]))...
%             .* (reshape(Mis.Mistake(:,:,pp),[I,1,M]) == 1) ...
%             .* (sum((TT.PA_reorder(:,:,:) <= mistake_pp(pp)),2) < J-1));
        Mis.Stu_rk_pref(:,jj,:,pp) = ...
            TT.Stu_rk_pref(:,jj,:).*(1 - ...
             (reshape(Mis.Mistake(:,:,pp),[I,1,M]) == 1) ...
            .* (TT.PA_reorder(:,jj,:) <= mistake_pp(pp)));
    end
    [u_val, Mis.ROL(:,:,:,pp)] = sort(Mis.Stu_rk_pref(:,:,:,pp),2,'descend');
    Mis.ROL(:,:,:,pp) = Mis.ROL(:,:,:,pp) .* (u_val>0);
end


% Run Student-Proposing DA algorithm

Mis_Assigned=NaN(M,I,P);
Mis_Matched=NaN(M,J,I,P);
Mis_Cutoffs=NaN(J,M,P);

% Loop over MC samples
for mm = 1:M
    for pp = 1:P
        % Assigned: each student's assigned school
        % Matched: each school's assigned students
        [Assigned, Matched] = func_DA('stu', squeeze(Mis.Stu_rk_pref(:,:,mm,pp))', repmat(MC.Priority',J,1), Capacities);
        
        % Find school cutoffs
        Cutoffs=NaN(J,1);
        for jj = 1:J % loop over schools
            Cutoffs(jj) = min( MC.Priority(Matched(jj,:) == 1));
            % If school capacity is not filled, then cutoff is 0
            if sum(Matched(jj,:)) < Capacities(jj)
                Cutoffs(jj)=0;
            end
            % If the school's cutoff is the student with smallest priority, then cutoff is zero
            if Cutoffs(jj) == min(MC.Priority)
                Cutoffs(jj)=0;
            end
        end
        Mis_Assigned(mm,:,pp) = Assigned;
        Mis_Matched(mm,:,:,pp) = Matched;
        Mis_Cutoffs(:,mm,pp) = Cutoffs;
    end
end

Mis.Assigned = Mis_Assigned;
Mis.Matched = Mis_Matched;
Mis.Cutoffs = Mis_Cutoffs;

clear Mis_Assigned Mis_Matched Mis_Cutoffs


disp(['-- Part V execution time: ',num2str(toc(step_part5)),' seconds'])
clear step_part5;

% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%%                              PART VI: DATASET                                                  %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %
cd(folder_data)
save sim_data_counterfactual % save all for counterfactual analysis

% Set timer Part 5
step_part5 = tic;

disp(' ')
disp('PART V : DATASET')
disp('------------------------------------')
disp(' ')

clear var

disp('1) Build the dataset with mistakes')

DStruct = Mis;

run p1_1_data_construct_mistakes.m

clear sim_data_Mis;

% Simulated choice data
sim_data_Mis = horzcat(var.sample_id, var.stu_id, var.sch_id, var.sch_true_rank, var.sch_sub_rank, var.stu_eprio, var.stu_prio, ...
    var.sch_cutoff, var.sch_feas, var.sch_assignment, var.pp_mistake, var.sch_dist, var.nb_sch_ranked, ...
    var.sch_true_rank_feas, var.sch_sub_rank_feas, var.stu_assigned, var.stu_assigned_pref_feas, var.stu_truthful_feas, ...
    var.stu_assigned_pref, var.stu_topk, var.stu_top1, var.sch_assignment_unconstrained, var.Stu_char);

disp(' ')
disp('2) Data set: sim_data_Mis; description')

M = M*P;
run p1_2_data_description.m
M = NSim;

% save data
cd(folder_data)
save simulated_data Mis TT sim_data_* theta_true M P J I School_char School_small

disp(['-- Total execution time: ',num2str(toc(step_total_time)),' seconds'])
clear step_total_time